import pandasGeoVisualization
df = pandas.read_csv("./data/shared/covid/covid_combined.csv", index_col="date",
parse_dates=True)df.head()| state | fips | cases | deaths | dtc100 | population | deaths100k | |
|---|---|---|---|---|---|---|---|
| date | |||||||
| 2020-01-21 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
| 2020-01-22 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
| 2020-01-23 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
| 2020-01-24 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
| 2020-01-25 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
last_df = df.loc['2020-08-02']last_df.shape(54, 7)
import geopandasgdf = geopandas.read_file("./data/shared/covid/gz_2010_us_040_00_500k.json")gdf.head()| GEO_ID | STATE | NAME | LSAD | CENSUSAREA | geometry | |
|---|---|---|---|---|---|---|
| 0 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | |
| 1 | 0400000US25 | 25 | Massachusetts | 7800.058 | MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ... | |
| 2 | 0400000US26 | 26 | Michigan | 56538.901 | MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ... | |
| 3 | 0400000US30 | 30 | Montana | 145545.801 | POLYGON ((-104.05770 44.99743, -104.25015 44.9... | |
| 4 | 0400000US32 | 32 | Nevada | 109781.180 | POLYGON ((-114.05060 37.00040, -114.04999 36.9... |
gdf.columns.valuesarray(['GEO_ID', 'STATE', 'NAME', 'LSAD', 'CENSUSAREA', 'geometry'],
dtype=object)
gdf.plot()<AxesSubplot:>

df.head()| state | fips | cases | deaths | dtc100 | population | deaths100k | |
|---|---|---|---|---|---|---|---|
| date | |||||||
| 2020-01-21 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
| 2020-01-22 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
| 2020-01-23 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
| 2020-01-24 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
| 2020-01-25 | Washington | 53 | 1 | 0 | 0.0 | 7614893 | 0.0 |
Table Join
gdf.rename(columns={'NAME': 'state'}, inplace=True)
gdf.head()| GEO_ID | STATE | state | LSAD | CENSUSAREA | geometry | |
|---|---|---|---|---|---|---|
| 0 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | |
| 1 | 0400000US25 | 25 | Massachusetts | 7800.058 | MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ... | |
| 2 | 0400000US26 | 26 | Michigan | 56538.901 | MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ... | |
| 3 | 0400000US30 | 30 | Montana | 145545.801 | POLYGON ((-104.05770 44.99743, -104.25015 44.9... | |
| 4 | 0400000US32 | 32 | Nevada | 109781.180 | POLYGON ((-114.05060 37.00040, -114.04999 36.9... |
df['date'] = df.index.values
join_gdf = gdf.merge(df, on='state')
join_gdf.head()| GEO_ID | STATE | state | LSAD | CENSUSAREA | geometry | fips | cases | deaths | dtc100 | population | deaths100k | date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 1 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-12 | |
| 1 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 2 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-13 | |
| 2 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 3 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-14 | |
| 3 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 12 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-15 | |
| 4 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 17 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-16 |
last_gdf = join_gdf[join_gdf.date=='2020-08-02']
last_gdf.shape(52, 13)
drop = ['Puerto Rico', 'Alaska', 'Hawaii']
last_gdf = last_gdf[~last_gdf['state'].isin(drop)]last_gdf.shape(49, 13)
last_gdf.plot()<AxesSubplot:>

last_gdf.plot(column='dtc100')<AxesSubplot:>

last_gdf.plot(column='dtc100', legend=True, figsize=(16,9))<AxesSubplot:>

last_gdf.plot(column='dtc100', legend=True, figsize=(16,9),
scheme='quantiles', k=10)<AxesSubplot:>

last_gdf.plot(column='dtc100', legend=True, figsize=(16,9),
scheme='quantiles', k=10,
legend_kwds={'loc': 'lower right'})<AxesSubplot:>

ax = last_gdf.plot(column='dtc100', scheme='quantiles',k=10, legend=True,
figsize=(16,9), cmap='Reds',
legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()
Classifiers
Equal Interval
ax = last_gdf.plot(column='dtc100', scheme='EqualInterval',k=5, legend=True,
figsize=(16,9), cmap='Reds',
legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()
y = last_gdf.dtc100
import mapclassifyea5 = mapclassify.EqualInterval(y, k=5)ea5EqualInterval
Interval Count
--------------------
[0.76, 2.39] | 25
(2.39, 4.02] | 13
(4.02, 5.64] | 4
(5.64, 7.27] | 3
(7.27, 8.90] | 4
4.02-2.391.6299999999999994
5.64-4.021.62
Equal Count (Quantiles)
eq5 = mapclassify.Quantiles(y, k=5)eq5Quantiles
Interval Count
--------------------
[0.76, 1.49] | 10
(1.49, 1.84] | 10
(1.84, 2.93] | 9
(2.93, 4.23] | 10
(4.23, 8.90] | 10
ax = last_gdf.plot(column='dtc100', scheme='Quantiles',k=5, legend=True,
figsize=(16,9), cmap='Reds',
legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()
Maximum Breaks
mb5 = mapclassify.MaximumBreaks(y, k=5)
mb5MaximumBreaks
Interval Count
--------------------
[0.76, 5.03] | 41
(5.03, 5.73] | 1
(5.73, 6.66] | 2
(6.66, 8.15] | 3
(8.15, 8.90] | 2
ax = last_gdf.plot(column='dtc100', scheme='MaximumBreaks',k=5, legend=True,
figsize=(16,9), cmap='Reds',
legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()
Box Plot
mapclassify.BoxPlot(y)BoxPlot
Interval Count
----------------------
( -inf, -1.82] | 0
(-1.82, 1.62] | 13
( 1.62, 2.37] | 12
( 2.37, 3.91] | 12
( 3.91, 7.35] | 9
( 7.35, 8.90] | 3
y.median()2.3743977976600137
ax = last_gdf.plot(column='dtc100', scheme='BoxPlot', legend=True,
figsize=(16,9), cmap='Reds',
legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()
mapclassify.BoxPlot?Init signature: mapclassify.BoxPlot(y, hinge=1.5) Docstring: BoxPlot Map Classification. Parameters ---------- y : numpy.array Attribute to classify hinge : float (default 1.5) Multiplier for *IQR*. Attributes ---------- yb : numpy.array :math:`(n,1)`, bin ids for observations. bins : array :math:`(n,1)`, the upper bounds of each class (monotonic). k : int The number of classes. counts : numpy.array :math:`(k,1)`, the number of observations falling in each class. low_outlier_ids : numpy.array Indices of observations that are low outliers. high_outlier_ids : numpy.array Indices of observations that are high outliers. Notes ----- The bins are set as follows:: bins[0] = q[0]-hinge*IQR bins[1] = q[0] bins[2] = q[1] bins[3] = q[2] bins[4] = q[2]+hinge*IQR bins[5] = inf (see Notes) where :math:`q` is an array of the first three quartiles of :math:`y` and :math:`IQR=q[2]-q[0]`. If :math:`q[2]+hinge*IQR > max(y)` there will only be 5 classes and no high outliers, otherwise, there will be 6 classes and at least one high outlier. Examples -------- >>> import mapclassify >>> import numpy >>> cal = mapclassify.load_example() >>> bp = mapclassify.BoxPlot(cal) >>> bp.bins array([-5.287625e+01, 2.567500e+00, 9.365000e+00, 3.953000e+01, 9.497375e+01, 4.111450e+03]) >>> list(bp.counts) [0, 15, 14, 14, 6, 9] >>> list(bp.high_outlier_ids) [0, 6, 18, 29, 33, 36, 37, 40, 42] >>> cal[bp.high_outlier_ids].values array([ 329.92, 181.27, 370.5 , 722.85, 192.05, 110.74, 4111.45, 317.11, 264.93]) >>> bx = mapclassify.BoxPlot(numpy.arange(100)) >>> bx.bins array([-49.5 , 24.75, 49.5 , 74.25, 148.5 ]) Init docstring: Parameters ---------- y : numpy.array :math:`(n,1)`, attribute to classify hinge : float (default 1.5) Multiple of inter-quartile range. File: /opt/tljh/user/lib/python3.9/site-packages/mapclassify/classifiers.py Type: type Subclasses:
STDMEAN
mapclassify.StdMean(y)StdMean
Interval Count
----------------------
( -inf, -1.15] | 0
(-1.15, 0.97] | 3
( 0.97, 5.22] | 38
( 5.22, 7.34] | 5
( 7.34, 8.90] | 3
y.mean()- y.std()0.9741967014843262
y.mean()+y.std()5.217609367021282
ax = last_gdf.plot(column='dtc100', scheme='StdMean', legend=True,
figsize=(16,9), cmap='Reds',
legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()
Fisher-Jenks
mapclassify.FisherJenks(y, k=5)FisherJenks
Interval Count
--------------------
[0.76, 2.12] | 24
(2.12, 3.34] | 9
(3.34, 5.29] | 9
(5.29, 7.29] | 4
(7.29, 8.90] | 3
ax = last_gdf.plot(column='dtc100', scheme='FisherJenks', k=5, legend=True,
figsize=(16,9), cmap='Reds',
legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()
ax = last_gdf.plot(column='dtc100', scheme='FisherJenks', k=5, legend=True,
figsize=(16,9), cmap='Blues',edgecolor='k',
legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()
fj5 = mapclassify.FisherJenks(y,5)
q5 = mapclassify.Quantiles(y, 5)
ea5 = mapclassify.EqualInterval(y, 5)
fj5.adcm, q5.adcm, ea5.adcm(15.68079626959761, 21.603825374669483, 18.970554166382374)
Color: Categorical
c_gdf = last_gdf.to_crs(last_gdf.estimate_utm_crs())centroids =c_gdf.centroideast = c_gdf.centroid.x > c_gdf.centroid.x.median()
north = c_gdf.centroid.y > c_gdf.centroid.y.median()c_gdf['east'] = c_gdf.centroid.x < centroids.x.median()
c_gdf['west'] = c_gdf.centroid.x >= centroids.x.median()
c_gdf['south'] = c_gdf.centroid.y < centroids.y.median()
c_gdf['north'] = c_gdf.centroid.y >= centroids.y.median()
c_gdf['NE'] = c_gdf.north * c_gdf.east
c_gdf['SE'] = c_gdf.south * c_gdf.east
c_gdf['NW'] = c_gdf.north * c_gdf.west
c_gdf['SW'] = c_gdf.south * c_gdf.west
c_gdf['region'] = (1 * c_gdf.NE) + (2 * c_gdf.NW) + (3* c_gdf.SW) + (4*c_gdf.SE)c_gdf.groupby(by='region').count()| GEO_ID | STATE | state | LSAD | CENSUSAREA | geometry | fips | cases | deaths | dtc100 | ... | date | east | west | south | north | NE | SE | NW | SW | usgreedy | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| region | |||||||||||||||||||||
| 1 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | ... | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 |
| 2 | 11 | 11 | 11 | 11 | 11 | 11 | 11 | 11 | 11 | 11 | ... | 11 | 11 | 11 | 11 | 11 | 11 | 11 | 11 | 11 | 11 |
| 3 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | ... | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 |
| 4 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | ... | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 |
4 rows × 22 columns
c_gdf.plot(column='region', categorical=True)<AxesSubplot:>

c_gdf.plot(column='region')<AxesSubplot:>

usgreedy = mapclassify.greedy(c_gdf)c_gdf['usgreedy'] = usgreedy
c_gdf.plot(column='usgreedy', categorical=True)<AxesSubplot:>

Spatial Dynamics
join_gdf.head()| GEO_ID | STATE | state | LSAD | CENSUSAREA | geometry | fips | cases | deaths | dtc100 | population | deaths100k | date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 1 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-12 | |
| 1 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 2 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-13 | |
| 2 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 3 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-14 | |
| 3 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 12 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-15 | |
| 4 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 17 | 0 | 0.0 | 1344212 | 0.0 | 2020-03-16 |
dates = [f"2020-{month:02}-01" for month in range(5,9)]
dates['2020-05-01', '2020-06-01', '2020-07-01', '2020-08-01']
last4 = join_gdf[join_gdf.date.isin(dates)]
last4 = last4[~last4['state'].isin(drop)]
last4.shape(196, 13)
for day in dates:
last4[last4.date==day].plot(column='dtc100', scheme='Quantiles', k=5,
legend=True, legend_kwds={'loc': 'lower right'})



import mapclassify
q5 = mapclassify.Quantiles(last4.dtc100, k=5)
q5Quantiles
Interval Count
--------------------
[0.76, 1.87] | 40
(1.87, 3.12] | 39
(3.12, 4.20] | 39
(4.20, 5.44] | 39
(5.44, 9.45] | 39
q5.binsarray([1.86813187, 3.12420625, 4.20334682, 5.44027239, 9.45494994])
last4['pooled5'] = q5.ybfor day in dates:
last4[last4.date==day].plot(column='pooled5', scheme='equalinterval',
legend=True, legend_kwds={'loc': 'lower right'})



Interactive Maps
last_gdf.columnsIndex(['GEO_ID', 'STATE', 'state', 'LSAD', 'CENSUSAREA', 'geometry', 'fips',
'cases', 'deaths', 'dtc100', 'population', 'deaths100k', 'date'],
dtype='object')
last_gdf = last_gdf[['GEO_ID', 'STATE', 'state', 'LSAD', 'CENSUSAREA', 'geometry', 'fips',
'cases', 'deaths', 'dtc100', 'population', 'deaths100k']] # omit the date columnlast_gdf.explore()Make this Notebook Trusted to load map: File -> Trust Notebook
last_gdf.explore(column='dtc100')Make this Notebook Trusted to load map: File -> Trust Notebook
last_gdf.explore(column='dtc100', scheme='quantiles', k=5)Make this Notebook Trusted to load map: File -> Trust Notebook
last_gdf.head()| GEO_ID | STATE | state | LSAD | CENSUSAREA | geometry | fips | cases | deaths | dtc100 | population | deaths100k | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 143 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | 23 | 3958 | 123 | 3.107630 | 1344212 | 9.150342 | |
| 327 | 0400000US25 | 25 | Massachusetts | 7800.058 | MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ... | 25 | 118458 | 8638 | 7.292036 | 6949503 | 124.296658 | |
| 473 | 0400000US26 | 26 | Michigan | 56538.901 | MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ... | 26 | 91857 | 6460 | 7.032670 | 9986857 | 64.685016 | |
| 616 | 0400000US30 | 30 | Montana | 145545.801 | POLYGON ((-104.05770 44.99743, -104.25015 44.9... | 30 | 4193 | 61 | 1.454806 | 1068778 | 5.707453 | |
| 767 | 0400000US32 | 32 | Nevada | 109781.180 | POLYGON ((-114.05060 37.00040, -114.04999 36.9... | 32 | 50270 | 833 | 1.657052 | 3080156 | 27.044085 |
last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
tooltip=['dtc100', 'state', 'fips'])Make this Notebook Trusted to load map: File -> Trust Notebook
import foliumcentroids = last_gdf.centroid/tmp/ipykernel_3348826/2951951418.py:1: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
centroids = last_gdf.centroid
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
tooltip=['dtc100', 'state', 'fips'])
centroids.explore(m=m)
mMake this Notebook Trusted to load map: File -> Trust Notebook
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
tooltip=['dtc100', 'state', 'fips'],
name='polygon')
centroids.explore(m=m, name='centroid')
folium.LayerControl().add_to(m)
mMake this Notebook Trusted to load map: File -> Trust Notebook
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
tooltip=['dtc100', 'state', 'fips'],
name='polygon', tiles='Stamen Toner')
centroids.explore(m=m, name='centroid')
folium.LayerControl().add_to(m)
mMake this Notebook Trusted to load map: File -> Trust Notebook
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
tooltip=['dtc100', 'state', 'fips'],
name='polygon', tiles='Stamen Terrain')
centroids.explore(m=m, name='centroid')
folium.LayerControl().add_to(m)
mMake this Notebook Trusted to load map: File -> Trust Notebook
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
tooltip=['dtc100', 'state', 'fips'],
name='polygon', tiles='CartoDB positron')
centroids.explore(m=m, name='centroid')
folium.LayerControl().add_to(m)
mMake this Notebook Trusted to load map: File -> Trust Notebook
m.crs'EPSG3857'
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
tooltip=['dtc100', 'state', 'fips'],
name='polygon', tiles='CartoDB positron')
centroids.explore(m=m, name='centroid')
folium.Marker([32.7774, -117.0714],
popup='SDSU',
icon=folium.Icon(color='red', icon='ok-sign'),).add_to(m)
folium.LayerControl().add_to(m)
mMake this Notebook Trusted to load map: File -> Trust Notebook